{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "E:\\Program Files (x86)\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:4: DeprecationWarning: numpy.core.umath_tests is an internal NumPy module and should not be imported. It will be removed in a future NumPy release.\n",
      "  after removing the cwd from sys.path.\n"
     ]
    }
   ],
   "source": [
    "# -*- coding: UTF-8 -*-\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import numpy.core.umath_tests\n",
    "import matplotlib as mpl\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "import seaborn as sns\n",
    "sns.set(style=\"whitegrid\")\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "from sklearn.model_selection import learning_curve\n",
    "from sklearn.preprocessing import LabelEncoder\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn.ensemble import RandomForestRegressor\n",
    "from sklearn.linear_model import LinearRegression\n",
    "from sklearn.metrics import mean_squared_error"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('data/cleaned.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "le_U_ID = LabelEncoder()\n",
    "df['User_ID'] = le_U_ID.fit_transform(df['User_ID'])\n",
    "le_P_ID = LabelEncoder()\n",
    "df['Gender'] = np.where(df['Gender']=='M',1,0) # Female: 0, Male: 1\n",
    "df_Age = pd.get_dummies(df.Age)\n",
    "df_CC = pd.get_dummies(df.City_Category)\n",
    "df_SIC = pd.get_dummies(df.Stay_In_Current_City_Years)\n",
    "df_encoded = pd.concat([df,df_Age,df_CC,df_SIC],axis=1)\n",
    "df_encoded.drop(['Age','City_Category','Stay_In_Current_City_Years'],axis=1,inplace=True)\n",
    "df_ocup = pd.get_dummies(df.Occupation)\n",
    "df_encoded = pd.concat([df_encoded,df_ocup],axis=1)\n",
    "df_encoded.drop(['Occupation'],axis=1,inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_frac = df_encoded.sample(frac=0.02,random_state=100)\n",
    "X = df_frac.drop(['Purchase','User_ID','Product_ID','Product_Category_1','Product_Category_2','Product_Category_3'], axis=1)\n",
    "y = df_frac['Purchase']\n",
    "X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_sizes, train_scores, valid_scores = learning_curve(LinearRegression(), X_train, y_train, cv=10, scoring='neg_mean_squared_error')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.legend.Legend at 0x1fa00206d30>"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAEUCAYAAAAiMOHqAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzt3Xl8XOV97/HPmVW75X2TbckYHhZ5HTYbsy/BBOwkDeWGJi1JSNMEmtzbm94LLRCyQGlo0uY2NGSFBJpCe0u4bMEETCgGg2FYZcwDxpZteZEl29ql0Tb3jzMC2ZYljTxnFs33/Xr5hXTmnPP8RkjnN89zzvN7nHg8joiI5B9fpgMQEZHMUAIQEclTSgAiInlKCUBEJE8pAYiI5CklABGRPBXIdADDMcaUAS8Cl1tra0ex/6+BddbaexPfnwX8IxAC9gNfsNZu9yxgEZEckrU9AGPMGcB64IRR7DvLGPMo8OnDXvpX4Fpr7ZLE1/8n5YGKiOSobO4BfAm4DrhvYIMx5k+B/46buKLAddbaLuBPgP+H+yl/YN8wcJO19q3EpreAv0xP6CIi2c/J9pnAxpha4DygGLgbuNha22WM+Tug3Vr73UH73gv8YWAIaNB2H/AI8Iq19ltpCVxEJMtlcw/gcOcDxwMvGWPAHdd/baSDjDEh4Fe47/V2LwMUEckluZQA/MC/W2u/BmCMKWGE+BP7PII7NLTGWtvjeZQiIjkia28CD+EPwCeNMdOMMQ7wY9z7AcO5H9gCXGWtjXkcn4hITsmZBGCtfRP4FrAO2ITbI7jjaPsbY5YCa4CzgNeMMW8YY55IR6wiIrkg628Ci4iIN3KmByAiIqmVdTeBo9FoGDgN2AP0ZTgcEZFc4QdmAq9EIpFR3fPMugSAe/F/PtNBiIjkqLNxqyiMKBsTwB6AE044gVAolJYGa2pqqK6uTktbqZarsedq3JC7sedq3KDYR6O7u5v33nsPEtfQ0cjGBNAHEAqFCIfDaWs0nW2lWq7GnqtxQ+7Gnqtxg2JPwqiHznUTWEQkTykBiIjkqWwcAhIRGZX+/n7q6upob2/PdChHFQgE2Lx5c8rOV1xcTEVFBT7fsX9+VwIQkZzV2NiI4zgYY1JyQfRCe3s7xcXFKTlXf38/u3btorGxkWnTph3z+bLzJyYiMgpNTU1Mnz49ay/+qebz+Zg+fTrNzc2pOV9KziIikgF9fX0Eg8FMh5FWwWCQ3t7elJxr3CWAru7U/GBEJDc4jpPpENIqle933CWAzdsOcLClK9NhiIgc1Q033MBDDz1EfX09X/rSl4bcJ7HwlafGXQLoiPVS19CW6TBEREY0ffp0fvazn2Ws/XGXAOLxODv3ttLTqzpyIpI+119/PWvXrv3w+0996lNs3LiRL3zhC3zyk5/kwgsv5Omnnz7kmLq6Oi644IIPv/7MZz7DmjVruOWWW9IS87h8DDTW00fDwU5mTS3JdCgikibrXt3B7zfu8OTcF58+lwtOnTvsPmvWrOHRRx/lYx/7GLW1tcRiMe6//35uvvlmqqur2bBhA7fffjsXXXTRkMd/5zvf4VOf+hRXXnklDz/8MA8++KAXb+UQ464HAFBcGGTb7tQ8JiUiMhrnnnsur7/+Om1tbTz22GOsXr2aO++8kw8++IC77rqLe+65Z9gJaxs3bmTVqlUArF69Oi1PN43LHkBhOEBzWzetHd2UFqWnoqiIZNYFp478Kd1LoVCI888/n3Xr1vHkk0/yk5/8hKuvvpply5axcuVKli9fzje+8Y1hzzGwQqPjOGmZ2zAuewAAPr/DnsbsnR4uIuPPmjVruOeeeygvL6e4uJja2lq+8pWvcM455/DMM8/Q13f0e5MrVqzgkUceAeCpp54iFhvVmi7HZNwmgLKiENv3tNDX15/pUEQkT0QiEVpbW1m9ejXl5eV8+tOf5sorr2TVqlW0t7fT1dVFR0fHkMfecsstrF27ltWrV/Pcc8+lrHzEcMblEBBAwO+jp6+P/S1dTJtYlOlwRCRPDH7S58Ybb+RrX/vahxfzW2+9FYA77rjjw33WrVsHuI+E/vrXv/5w++233+55rOO2BwBQEAywY29rpsMQEclK4zoBFBcG2Xewg86YykOIiBxuXCcAx3FwgPr9uhksInI4z+4BGGOuBa4ftKkKuM9ae/1RDvFEaVGIrbtbmDujDJ8vv4pGiYgMx7MEYK39OfBzAGPMKcDDwK1etXc0oaCflo5umttiTCwrSHfzIiJZK11DQD8G/sZa25im9g4R9PtUIE5E5DDOwMwzrxhjLgL+zlp72mj2j0ajlcC2sba3aXsH8TgEAx8N9/TH47R39bN4fjFBv4aBRMaLQCDAggULMh1G2m3ZsmW4RWGqIpFI7WjOk455AF8GfpDsQdXV1YTD4aQb6wzsIh6HcNB/yPbG5k7mVE1h9tTSI46JRqNEIpGk28oGuRp7rsYNuRt7rsYNR4998+bNaZkwNRqtra3ccMMN3HXXXYdsP9qawG+//TYPPPAAt912W9JthUIhFi9efMi2WCxGTU1NUufxNAEYY0LAucA1XrYzGsUFQWp3twyZAEREjlVzczObN28e9f4LFy5k4cKFHkY0Mq97AIuA96y1GX8OszAcoLGpUwXiRMap1rf+QOub6zw5d+niCyhddN6w+3z3u99l3759XHfddXzwwQdMnDiRgoIC7rjjDm688Ubq6+vZt28fy5cv57bbbmPjxo386Ec/4r777uNzn/scCxcuJBqNcuDAAW666SbOPfdcT97LYF7fBJ4P1Hncxqi5BeJ0M1hEUu+mm25i2rRp3HjjjWzbto0777yTe+65h/Xr13PSSSfx4IMPsnbtWl555RU2bdp0xPE9PT08+OCD3Hjjjfzwhz9MS8ye9gCstf8O/LuXbSSjrChE7Z5Wjptdjt8/rufAieSd0kXnjfgpPV0mT55MRUUFAJdeeikffPAB9957L1u3bqWpqWnIgnBnn302AMcffzxNTU1piTOvroIBv4/eRIE4ERGvFBR8NOfogQce4Hvf+x6TJk3is5/9LMcddxxDPX058NCL46TvScW8SgDgFojbvrcl02GIyDgTCASGfDTzpZde4qqrrmL16tXEYjHeffdd+vuzo0z9uC0HfTTFhUEaDnTSGeulMJx3b19EPDJ58mRmzZrFjTfeeMj2q6++mjvuuIOf/vSnlJSUsHTpUurq6pg7N3Orlw3Iuyug4zg4jkP9/nYqZ03IdDgiMk4Eg0EeeOCBI7affvrprF27dshjzjjjDADuu+++D7dVVFR8uEaA1/JuCAigtCjI1t0t9Pd7OwtaRCSb5WUCCAX9dMZ6aW7zfs1NEZFslZcJACAU8FG3T3MCRHKd1/XMsk0q32/eJoDSohB1Da109/RlOhQRGSO/309PT0+mw0irnp4eAoHU3L7N2wTg8znE49DQdOSEDBHJDeXl5dTX12fNY5Ve6+/vp76+ngkTUvMAS949BTTYQIE4LRMjkpumTJlCXV0d1tpMh3JU3d3dhEKpqz9WXFzMlClTUnKuvE4AAwXi+p38+PQgMt74fL6seJ5+ONFo9IjSzdkib4eABvj8Dvtb82sMUUQElAAoKwrR0NxLX596ASKSX/I+AbgF4vpVIE5E8k7eJwBw5wSoQJyI5BslAKAg6NBwsJOOLt0LEJH8oQRAokAcDvsOaE6AiOQPrxeFvwL4JlAMPGWt/bqX7R2LgQJxc2eU4fOlb0EGEZFM8awHYIyZD9wNfAJ3cfhlxphVXrV3rFQgTkTyjZc9gE8CD1pr6wCMMVcBWf2ozUCBuIllmhssIuOf41UlPWPMj4FuoAqYCzwG3GytHbbBaDRaCWwba7ubtncQj0MwkPwwTn88TntXP4vnFxP0axhIRHJSVSQSqR3Njl72AALAOcB5QBvwCPBnwL2jObi6uvrDRZKT0RnYRTwO4aB/1Mds2bKFBQsWANDY3MmcyinMnlaadNuZEI1GiUQimQ4jabkaN+Ru7LkaNyj20YjFYtTU1CR1jJdPAe0FnrbWNlhrO4HfAqd72F5KFBcEqd2jOQEiMv552QN4DPiVMaYcaAVWAQ972F5KDBSIa2nvpqw4dRX8RESyjWc9AGvty8D3gPXAO8B24B6v2ksln99hT6NWCxOR8c3TeQDW2l8Cv/SyDS+UFYXYvreVBRXl+P2aKyci45OubkMI+H309PapQJyIjGtKAEdRGA6yXTeDRWQcUwI4iuKCAA1NKhAnIuOXEsBROI6Dz3Go368CcSIyPikBDKOkMMi2PS3093szW1pEJJOUAIYRCvrpivXSpAJxIjIOKQGMIBjwUVffmukwRERSTglgBKVFIXY1ttHd05fpUEREUkoJYAQ+n0O8H60WJiLjjhLAKBQXBqnVovEiMs4oAYxCYThAS3s3Le3dmQ5FRCRllABGye9z2N2gAnEiMn4oAYxSaVGIHfWt9Pb1ZzoUEZGUUAIYpYDfR29vPweaVSBORMYHJYAkFIYDWi1MRMYNJYAkFBUEaGzqUIE4ERkXlACS4DgOPp9PBeJEZFzwdEUwY8yzwDRg4CPzlxNLReasksIg23Y3M29mGT6fk+lwRETGzLMEYIxxgBOAedbaXq/aSbdQ0E9LezdNbTEmlRVkOhwRkTHzcgjIJP77lDHmTWPM9R62lVahoI+dKhAnIjnOice9qXVvjFkOfAX4SyAI/AH4H9ba3w93XDQarQS2jbXdTds7iMchGPBueKY/Hqe9q5/F84sJ+jUMJCJZpSoSidSOZkfPhoCstRuADQPfG2N+AVwGDJsABlRXVxMOh5NutzOwi3gcwkH/qI/ZsmULCxYsSKqdxqZOKuZNoWJ6abIhplQ0GiUSiWQ0hrHI1bghd2PP1bhBsY9GLBajpqYmqWM8GwIyxqw0xlw4aJPDRzeDc15JokCcVz0oERGveXkPoBy40xhTYIwpBf4M+K2H7aVVQThAc1uM1o5xk9NEJM94lgCstY8BjwOvA1Hgl4lhoXEj4PepQJyI5CxP5wFYa28GbvayjUwqSxSIWzCnnIBfc+pEJLfoqnUM/CoQJyI5TAngGKlAnIjkKiWAY6QCcSKSq5QAjpEKxIlIrlICSIGBAnH9/ZoTICK5QwkgBUJBP13dfTS1xTIdiojIqCkBpIgKxIlIrhlXCWDrrmbeeK8hI22XFIbY3dhGd09fRtoXEUnWuEoAdsdB1r60nfd3Hkx72z6fQ7wf9h3QzWARyQ3jKgFcfPpcpk4s5KmXd9AZS/8aNCWFQbbtUYE4EckNY04AxpgpqQwkFQJ+H5etqKSjq4ffb9yR9vYLwgFa2mO0tHenvW0RkWQNmwCMMU8N+vrGw15+iiw0Y3Ixp588gzffb+CDXU1pbz/g97GnsT3t7YqIJGukHsDUQV9fedhrWbsU1lmLZzF5QgGPr99GLM03ZQcKxPX29ae1XRGRZI2UAAYPZh9+wc/age6A38cVK+fT3N7Nuld3prVtFYgTkVwxUgIYfNHP2gv+UOZML+X0k2fw6uZ6tu9Nb7G2wnCAbXua09qmiEiykukB5JzzIxWUl4R5bP1WenrTNxRUVBBgf1OnCsSJSFYbaUGYE40xbyW+XjDoaweY711YqREK+rl8ZRX3P/kuz722i4tOn5uWdgcKxO3d38H82RPS0qaISLJGSgCrjrUBY8w/AFOstdcc67nGomrWBJaeMJWXNu3hpKpJzJ5akpZ2S4uC1O5upnJmGT5f1t4vF5E8NmwCsNY+d/g2Y8wk4KC1dsThIWPMhbiLwT8+5ghT4KLT57KlrolHn9/KtWuq07J8YzDgp7m9m4OtXUyeUOh5eyIiyRppHkCZMeZ+Y8y5ie//DWgAthhjFoxw7CTgNuD2VAU7VgWhAB8/q4qGpk7Wv7k7be2GAj7q9mnReBHJTs5wZQuMMT8F+nAXdj8duBeIANXAl621nxjm2P8A7gbmAOeNdggoGo1WAttGFf0QNm3vIB6HYODIYZcN77ayvaGbjy2dwMSSkUa/jl1/PE57Vz+Lq4qHjEdExANVkUikdjQ7jnQVXA4sstbGjTGrgIestTuBncaY7x/tIGPMtcBOa+0zxphrRhn0IaqrqwmHw0kf1xnYRTwO4aD/iNdmVfRw90Nv8cb2Hr54hflwbH7Lli0sWDBsh2bMGps6qaicQsX0Uk/OH41GiUQinpzbS7kaN+Ru7LkaNyj20YjFYtTU1CR1zEiD4b2DxvpXAIPvCQz3kfYq4BJjzBvAt4HVxph/TCoyDxQVBLl0eSV793ew4e09aWlzYLUwFYgTkWwzUg+gzxgzASgBFgHPAhhjZgNHrXhmrb144OtED+A8a+3/OOZoU+Dkqsls2rqf596ow8ybyJRyb2/QFoTdReNb2ruZUJJ8j0ZExCsj9QB+BLwGPA88aK3da4y5ArcQ3I+9Ds4rq5ZXEvT7eHT91rSs4+v3+9itAnEikmWGTQDW2nuB/wZ8HbgmsXkK8D1r7d2jacBae2+m5gAcTUlRiEvOmEfdvjZe3VzveXtlRSF2qkCciGSZER+Fsda+ctj393gXTvosWjCFd7btZ110J5cuLfO0rYECcfubu5g+qcjTtkRERmvYBDCo9MOQrLWLUhtO+jiOw2Urqrj7t2+x8f02FlfHcRzvHtUsDAeo3d2sBCAiWWOkHkAJUAjcBzyJOydg3JhQEuai0+byxIu1vP5eA8vMNM/aKioI0NjcRUdXD0UFQc/aEREZrZHuAcwH/hiYBPwL8HGgwVr73FBlInLRMjONaRMCPL1xBy3tMc/acRwHnwN79utmsIhkhxGL4lhrn7fWXgssBd4A/tEYs8EY8xXPo0sDx3E4/YQS+vrjPPFirafP65cVh6jd3ZKWJ49EREYy6qpo1tpO4N+Bu3DnAGS8xk+qlBb6ueDUCt7f2UTN1v2etRMM+Onq7uVgq1YLE5HMG1UCMMacaYz5F6AO+DLuHICZXgaWbqedNIOKaSWsfWk7bZ3eLeQSDgbYWd/q2flFREZrpKeAvgl8FmgDfg0sttZ6/+B8Bvh8DpevnM/PHn6bJzfU8ukLjveknZLCIHsa2zmpqm/IekUiIuky0lNA3wR24H7yPx843xjz4YvW2tXehZZ+U8sLOWfpbJ6N1rG59gAnVU5KeRsDBej2HehgjkcF4kRERmOkBPD5tESRRZYvnMnm2gP8bkMtlTPLKAynvmx0caG7WljFtBJP5x6IiAxnpBXBfnW014wxFx/ttVzm9/m4YuV8fvHIJp56eTtrzjku5W0UhAI0qECciGTYSPcAlgH/DBwAPm+tbTTGzAX+CXe94HG51uGMycWsWDST9W/u5pSqySyYU57yNgIBt0CcEoCIZMpITwH9GHgI2ArcZIxZA7wFFAOLPY4to85eMpsp5YU8/uI2Yt29KT9/WaEKxIlIZo2UACZYa78P/BVwBW5C+Atr7ceste95Hl0GBfw+Vq+cT2tHN8+8ujPl5x9cIE5EJBNGSgAdANbaPqAA+Li19gHPo8oSs6eVcMbJM4i+u4/aPS0pP/9AgTgRkUwYKQEMfkSl0Vr7upfBZKPzIhVMLA3z2PqtdPekthbe4AJxIiLpNlIC8BljJhpjJgEMfD3wLw3xZVww4OeKlfM52BrjD6/VpfTcKhAnIpk00kPuC4FGPuoJDC6UEweGncpqjPk28OnEvr+w1v5gjHFm1LyZZUROnMbLm/ZyctUkKqalbgLXQIG4qpkTPpwkJiKSDiPNAxh1sbjDGWPOBS7AXUw+CLxjjHncWmvHes5MuvDUOby/s4lH12/lS2sWEvCP+UdziGDAT1NbjIOtXUyeMC6fqhWRLJWaq9gQEusFnG+t7QWm4SabnB3rCIcCfPysKhqbunj+jV2pPbcKxIlIBjhe1r8HMMZ8C/gG8B+4k8mGbTAajVYC28ba3qbtHcTjEAx4M5zykm2jtj7GJcsmMKkkNWUi+uNx2rr6WVJV7FncIpI3qiKRSO1odkx9oZvDWGu/aYz5e+BR4EvAT0dzXHV1NeFw8rNkOwO7iMdJqtLmli1bWLBgwaj2nT2nl7sfeos3anv44uoT8PtS04lqbO5kduWUpAvERaNRIpFISmJIp1yNG3I39lyNGxT7aMRiMWpqapI6xrMhIGPMicaYJQDW2g7cGcU5u4j8gMJwgFXLK6k/0MGLb+1J2XlLEgXivO6RiYgM8CwBAPOBnxljwsaYELAGWO9he2lzYuUkTq6axPNv7KLhYEdKzlkQCtDS0U1Le3dKziciMhIvbwI/ATwOvA5EgRfH0yziS8+sJBT08+j6rSlb43egQJyISDp4eg/AWnsrcKuXbWRKcWGQj505j4ef+4CN7+zlzOpjXyGzrMgtEHf8nPKUPWYqInI0usocg+r5kzl+TjnPRus40HLsRd38PhWIE5H0UQI4Bo7jcNmKKvw+h8fWb03JDVwViBORdFECOEZlxSEuPn0u2/e28prdd8znGygQ196pAnEi4i0lgBRYcsJUqmaW8fQrO2huix3TuQYKxO09oJvBIuItJYAUcByHj6+sIh6Hx1/YdsxDQQMF4lL1dJGIyFCUAFJkYmkBF5w6hw92NfPWlsZjOlcw4CfW3cfBVt0MFhHvKAGk0GknTWfOtBKeenk7bR3HNqErFPSrQJyIeEoJIIUcx+Hys+fT09fPExtqj2koqLQoyJ7GdmIpXoVMRGSAEkCKTZlQyLlLK7DbD7K59sCYz+M4DnFg34HUlJoQETmcEoAHllfPZObkYp7cUHtM6/2WFAbZpgJxIuIRJQAP+HwOV5w9n85YH2tf3j7m8xSEArR19qhAnIh4QgnAI9MnFbFy8SxqPtjPezsOjvk8fp+jAnEi4gklAA+tXDyLqRMLeeLFbXR1947pHGXFIXbsbaG3rz/F0YlIvlMC8JDf72P1yvm0dfbw9MYdYzuHz0dfX3/K1h0QERmgBOCxWVNLOLN6Jq+/18C2MRZ5KwwH2b5XcwJEJLWUANLg3KUVTCor4LH12+gew3P9RQUB9qtAnIikmBJAGgQDPq5YOZ+mthjPRncmfbzjOPh9sHe/bgaLSOp4mgCMMd80xmxK/Puel21lu7kzSjntpOlsfKd+TCUeSovCbNujAnEikjqeJQBjzEXAJcBSYAkQMcZ80qv2csEFp85hQkmIR9dvpbc3uad6ggGfCsSJSEp52QPYA/xPa223tbYH2AzM9bC9rBcK+rn8rPnsb+7iuTfqkj6+IKQCcSKSOk46ygwYY44HXgDOsta+P9y+0Wi0Etg21rY2be8gHodgwBnrKTz38nttbNsb45KlE5hUGhj1cfF4nNaufhZXFREK6PaNiAypKhKJ1I5mx9FffcbIGHMK8Djw1yNd/Aerrq4mHA4n3V5nYBfxOISD/lEfs2XLFhYsWJB0W2NVMaeXu3/7Fq/X9nDt6hPw+0d/MW9s7mT2vCnMnVEKQDQaJRKJeBWqZ3I1bsjd2HM1blDsoxGLxaipqUnqGK9vAp8FPAPcYK39lZdt5ZKCcIBVK6rYd7CDF97andSxpYVBaveoQJyIHDsvbwLPAR4GrrbWPuBVO7nKzJ3IKfMn8/ybu9mXxCzfcChAW4cKxInIsfOyB/ANoAD4gTHmjcS/v/CwvZxz6ZnzKAj5efT5rUk93hnw+6hraPMwMhHJB57dA7DWfh34ulfnHw+KCoJcemYlD/1hCy9v2svyhTNHdVxpcZC6+lZOmDPR4whFZDzToyQZdnLVJMzcifzhtZ3sb+4c1TEDBeIam1QgTkTGTgkgwxzHYdWKSgJ+H4+t3zbqm7tFBSoQJyLHRgkgC5QWhbjkjHnsqG/l1c31ozqmMBzgQHMXXd1aJ0BExkYJIEssWjCF42ZP4JlXd9LUGhtxf8dx8PngQNvYFpoREVECyBKO43DZWVU4wOMvbB3VUFBpUZj6ph76VCBORMZACSCLlJeEufC0uWzd3cKb7zeOuH8w4KOnN06TCsSJyBgoAWSZyInTmDujlKc2bqe1Y+TJXkG/ww7dDBaRMVACyDKO43D5WfPp6+vniRdHfiqoMOSwd3/7mBedF5H8pQSQhSZPKOC8ZXN4b0cTm7YdGHZfx3GIA/sOjG4OgYjIACWALHXGKTOYNaWYtS/VjrgWsArEichYKAFkKZ/P4Yqz59PV3cfal7YPu68KxInIWIzLBJDscovZatrEIs5eMptN2/Zjtw8/FKQCcSKSrHGXAI6vKCcONDR10NE1/NBJLjhr0UymTyriiRdr6Yod/UbvQIG4nnGS/ETEe+MuAUyfXMy5yyo47aQZBAM+Gpo6aO3oztnxcb/PxxUr59Pe1cPvN+4Ydj8ViBORZIy7BADg9zlMm1TEikWzWLFwFmUlIRqbO2lqiyVVdz9bzJxSzIqFs3jj/QY+2NV01P1UIE5EkjEuE8AAx3GYWFbAaSfN4JylFcycXMyB1i4OtHTS15dbQyXnLJnN5AkFPP7CNmI9fUPuM1AgbqSnhkREYJwngMFKi0IsXDCF8yNzmD+7nOb2bhqbO+npHfpimm0CAXcoqLmtm3Wv7hxyn4ECcXsa29McnYjkIs8TgDGmzBhTY4yp9Lqt0SgMBzhh7kQuOHUOJ1dOorO7j+bOvpyYSTtneimnnzyDVzfXs2Nvy5D7lBaFqd3TrAJxIjIiTxOAMeYMYD1wgpftjEUo6Kdy1gTOX1bBghlh4nH3yaFsHz45P1JBeUmYR9dvG/KJn2DAR6ynXwXiRGREXvcAvgRcB+z2uJ0x8/t9TCoNcvaS2ZxxykwKwn4amjpoaY9l5ZNDoaCfy1dWcaCli+derxtyn4KQXwXiRGRETjoucsaYWuA8a23tSPtGo9FKYJu3EQ2vrauP+oM97G/txe+DorAPn+NkMqQjbHyvja17Y1y8pIzJZcFDXovH47R29bO4qohQIG9u84iIqyoSidSOZseAx4GMWXV1NeFwOC1tRaNRIpHIEdvbOnvYvreFHXtacByHsuIQAX92XFAr5vZy90Nv8fJ77Xz1ymVHxLW/uZPZ86Ywd0ZphiIc3tF+5rkgV2PP1bhBsY9GLBajpqYmqWOy42qWpUoKg5xSNZnzT53D8XPLaevsobG5k+6jPIaZTgWhAB8/q4q6iPliAAAQCUlEQVTmjj7Wv3nkCFuJCsSJyAiUAEahIBTguNnlnB+Zw8LjptDd209DUwedw5RmSIfj50ykclqIF97czd79hz76qQJxIjISJYAkBAM+5kwv5dxlFUROnI7f79BwsIO2DJaaWHZcMYVhP4+u33rELGcViBOR4aQlAVhrK0dzAzhX+H0OMyYXc9aiWSxfNIvSYrfURHNb+p8cCgd9rFpRxd79HWx4e88hr5UWB9m5VwXiRGRo6gEcA8dxmFRWwGknz+DsJRXMmFzM/pYuDrR00defvovuSZWTOLFyIs+9UUdj00crg/l9Pvr7VSBORIamBJAiZcVuqYnzllVQNauM5rZu9jd3pu3T96ozKwkFjhwKKioIUrtHcwJE5EhKAClWVBDEzJvEBafO4aTKSXTGemhs6iDmcamJkqIQl5wxl7p9bby6uf7D7YXhAAdbumjL8hnOIpJ+SgAeGSg1cV5kDouPn0Z/HBqaOj0tNbHwuCksqChnXXQnB1vcUhBugThHBeJE5AhKAB4L+H3MnlbC2Utmc/rJMygIDZSaSP2TQ47jcNmKShwHHnth24fnLysKsV0F4kTkMEoAaeLzOUydWMiZC2eyYtEsJk0ooLG5i4OtXSldpGZCSZiLT5tH7Z4WXn+vAXBLSXf39n/YKxARASWAtHMch4mlBSwz0zh3WQUV00o42Oo+OdSbokVqlpqpVM4s4+mNO2hpjwEQDvrZUa+bwSLyESWADCopDHLKfHeRmgUViVITTcdeasJxHD5+VhX98ThPvFhLPB6npDDI3v3twy4sLyL5RQkgCxSEAyyY45aaqD5uCrGevmMuNTGprIDzIxW8v7OJmq37cRwHB6g/qDkBIuJSAsgiwYCPuTNKOS8yh2VmGj6fQ0NT55hLTZx20gwqppWw9qXttHX2uAXidreoQJyIAEoAWcnvc5g5pYSVi2dxZvUMiotC7G/uSrrUhM/ncPnK+XT39PHkhlrCoQDtnT00t6lAnIhk8XoA4o7lT55QyOQJhTS3xajd08Kuhjb8PndtAr9v5Pw9tbyQc5bO5tloHZtrDzBzSjG7GlopL03PWgsikr3UA8gRE0rCLD5+Kuctq2DejDKa2mI0NnfS2zdyj2D5wpnMmFzE7zbUEvA77KxvU4E4EVECyDVFBUFOrJzEBafO5aR5k4j1xmls6hy21ITf5+OKlfPp7OrlmVd20t8fV4E4EVECyFXhoJ+q2RNYXFXE4uOn0NcPjU2ddHQNXWpixuRiViyayVtbGqk/0E7t7pY0Rywi2Ub3AHKc3+cwe1opM6eU0Njcyfs7D9JwsJOCsJ+SwiDOoMXsz14ym3e3H+TpV3YysSz84ZNBIpKf1AMYJ3w+h2kTi1ixcBYrFs+kvDRMY3MnB9s+KjUR8PtYvXI+rR3dvFRTrwJxInnO0x6AMeZq4CYgCPyTtfYuL9uTj0pNRE4soK2jm9o9Leysb8VxHCYUh5g9rYQzTpnBSzV7eeHNOubPnoDf54x8YhEZdzzrARhjZgO3ASuBJcCfG2NO9qo9OVJJUYjq49xSE8dVlNPS4S5Ss2LhTCaVhfn9xp3sbdSawSL5yvFqVqgx5s+Ac6y1X0x8fzPgWGu/Pdxx0Wi0EthWXV1NOJzcs+qtb/2B1jfXJR1ra2srpaWlSR+XDZKJPR6PE+vpozPWS1esl8bmLkoKg0wqK/A4yiN1dXVSUFCY9nZTIVdjz9W4IX9i9/scZiz/GGWLzk+6nVgsRk1NDUBVJBKpHc0xXg4BzQIGr1K+Bzh9tAcn3khSQrtq6WtuSPo4gM7m3C2VnHTsccAH4SC0dfZkbrWw5hyuTpqrsedq3JA3sW96LcrcnjIPg/mIlwnAh3upGeAAo559NJYeAJEIcE1yxwDRaJRIJJL0cdngWGLv7+8n+u4+mtpiKY5qZLW1tVRWVqa93VTI1dhzNW7In9iLCgKcY6ZTGE7+0jyoBzBqXiaAOuDsQd/PAHZ72J4kyefzcdrJMzLSdtTfSCQyLyNtH6tcjT1X4wbF7hUvE8DTwK3GmKlAO/BHwJ972J6IiCTBs6eArLW7gL8FngXeAH5jrd3oVXsiIpIcT+cBWGt/A/zGyzZERGRsNBNYRCRPKQGIiOQpJQARkTyVjdVA/QDd3eldtjAWS/+z8KmSq7HnatyQu7Hnatyg2Ecy6JrpH+0xnpWCGKtoNLoSeD7TcYiI5KizI5HI+tHsmI09gFdwJ5DtAfoyHIuISK7wAzNxr6GjknU9ABERSQ/dBBYRyVNKACIieUoJQEQkTykBiIjkKSUAEZE8pQQgIpKnlABERPJUNk4ESxljTBnwInC5tbbWGHMR8AOgEHjQWntTYr8lwM+BMuC/gL+w1vYaY+YC9wPTAAv8ibW2LQ1xfxP448S3j1tr/1cuxG6M+TbwadylQH9hrf1BLsR92Hv4B2CKtfaaZGM0xpQD/wrMBxqAP7bW7k1DzM8mYhlY3PnLwHHATUAQ+Cdr7V2JfZP6/+Fx3FcA3wSKgaestV/Phd8XY8y1wPWDNlUB9wEPZ3vshxu3PQBjzBnAeuCExPeFwC+BNcBJwGnGmFWJ3e8HrrfWnoC7dvGXEtv/BfgXa+2JwKvAzWmI+yLgEmApsASIGGM+k+2xG2POBS4AFgGnAn9pjFmc7XEf9h4uBP5s0KZkY/wu8Ly19iTgZ8AP0xCzg/s7vthau8RauwR3OdbbgJW4v0N/bow5eYx/A17FPR+4G/gE7u/MskQsWf/7Yq39+aCf9Z8A+4C/z4XYDzduEwDuD/k6PlqH+HTgfWvttsQnm/uBK40x84BCa+1Lif3uTWwPAucA/3fw9jTEvQf4n9babmttD7AZ9w88q2O31j4HnJ+Ibxpu77I82+MeYIyZhHvRvD3x/Vhi/DhuDwDg34BVif09DT3x36eMMW8aY64HLgLWWWsPWGvbE7F+miT/BjyO+5O4n5LrEr/nVwEdycSXyd+XQX4M/A1ury/XYh+/CcBae621dnBRuVm4F9cBe4CKYbZPAVoGdYMHtnvKWrtp4JfFGHM87lBQ/1FizLbYe4wx3wLeAZ4ZJr6sijvhJ7hLmB5MfD+WGD88JvF6CzDV27CZiPuz/iRwIfAXwNyjxJ7s/w8vLQD8xphHjDFvAF8dQ3yZ/H0Z6K0XWmv/Y5gYszL2AeM2AQzBhzs2PcDBvbCOdjuJ7WlhjDkF+D3w18BWciR2a+03cS96c3B7Llkfd2JMd6e19plBm8cSo3PYdgeP47fWbrDW/qm1ttla2wj8Avg2x/Zz9zxu3B7iRcAXgeXAGbiforP+92WQL+OO+TNELNkeO5BfCaAOt1LegBm4w0NH274PmGCMGaitPZOPhpM8ZYw5C/dT3Q3W2l8NE2PWxG6MOTFxswtrbQfwEHBetsedcBVwSeKT6LeB1cC1Y4hxV2I/jDEBoBTY72XgxpiViXsXAxygluR+7kfb7qW9wNPW2gZrbSfwW9yEkAu/LxhjQsC5wCOJTVn/NzqUfEoALwPGGLMg8UO/GvidtXY70JW46AJ8LrG9B3ddgqsS2/8U+J3XQRpj5uA+TXC1tfaBHIp9PvAzY0w48cexBndYJdvjxlp7sbW2OnFT7xbgEWvt58cQ4xOJ70m8/nxify+VA3caYwqMMaW4N7E/C1xojJlqjCkC/gh4kiR/jzyO+zHgY8aY8kQsq3DHw7P+9yVhEfBe4h4L5Mbf6BHyJgFYa7uAa4D/xB2jfpePbsD8CfCPxph3gRLg/yS2fxX3CYp3cNcouCkNoX4DKAB+YIx5I/Gp9Jpsj91a+wTwOPA6EAVeTCSwrI57BMnGeDNwpjFmU2Kf67wO0Fr7GIf+3H9prX0B937Gs8AbwG+stRvH+DfgVdwvA9/DfVLvHWA77g3VZOPL1O/LfNxP90BOXV8OofUARETyVN70AERE5FBKACIieUoJQEQkTykBiIjkKSUAEZE8pQQgnjLGVBpj4saYLx62/RvGmHtT2E6tMebUVJ1vhLbKjDEvGGM2GWM+lY42R4jnVmPMjzIdh+SecV0OWrJGP/B9Y8x6a63NdDApsASYbq1dkOlARI6FEoCkQyfwfeA3xpjl1truwS8megI11tp/OPx7Y0wt8BvcUtMTcScPnQVEcOvfr7bWDkyhvy5RgjoMfN9a+8vE+a7AnWQTwq04+Q1r7QZjzK24dWhmAW9aaz97WFyfwK1X7wNagb8CmnHL/s5OTNJbnihlMHDMpxJt9QN9wF9ba//LGHNmIvYw7rT/31trv2iMqQTW4dZ9iuD+Td6CW2dmoEzwZ3ALvD2HO6P3DNySD9cfVvAQY8xs4EeJ/YPAA9ba2xOlKf458bPrwa0v9fl015+X7KIhIEmX24A2EuWWk1RgrT0T98L4U+CH1trFwE7c2ZcDOq21y4CLgb8zxpySqKh6O3CZtXYp8OfAQ8aY4sQx84ClQ1z8T8StV/9HibZuAf4fbtXGa4EPEjXhOznUncBXrbWn4s4MPi+x/evALdbaM4CTgdXGmEjitSrchX9OBTbgriPwGeAU3BmiZyb2mws8lyhZcQPw4BDlpu/DnQ0cwS3/fJEx5o9xE915uOsGRHATwKIhftaSR5QAJC2stf24NWo+b4y5OMnD/zPx3w+AvdbaNwd9P2nQfj9JtLUbeAq3PPLFuJ+4n0l8Yv9X3E/nA8M3L9mhV766AHjGWrs1cc51uAW8IkPsO9gDwG+NMT/nox4LuDV6yo0xf4O7EEghblkAcD+RPzroPb1orW1JlBfYPeg9HrTW/iYRz+9wexgfXsQTSe1c4DuJ9/oSbtJYAryd2P9lY8x3gP+01r44wnuRcU4JQNLGWrsTd2jjV7j10AfEObSUcuiwQ2ODvh6uuFrfoK99iX39uBfyJYNWcToTqEnsd7QhED9Hluv14Q6rHJW19m9xV+J6Fbd38l+Jl/4LuAy3Rsy3cSuHDrznbmvt4LaO9h4PT1Q+Dn3P/sQ5Vxz2Xm+31jYBi3FrTfXh9h6+Otx7kfFPCUDSylr7f3GrHv73QZsbcJeRxBgzC/dT7FhckzjHXNzSws8k/l2SGNLBGHMZ8BbuJ/DhPINbrXJ+4rgLcNc4ePloBxhjAol7FkXW2rtxi30tMsZMB04D/re19iHchT8W4F6wkzHVGHNpoq0rcBPF2wMvWmtbcD/1/1Vin3LgBWCNMebyxHt60Vp7K/DrREySx5QAJBO+hlv9ccA/AzONMRa4B/em6FgUGGNewy3L/JfW2veste/gjvs/YIx5E/gO7o3jYW9+Jo77Ku79ghrgDuAKa23zMMf04ia23yTi+A/gC9baeuDvgNcS57oB98Kc7FNEXcDnEu/jb4FPWGv7DtvnatyKpG/jJqt/s9b+K27S3QTUGGNeBVYA30qyfRlnVA1UJAcknhaqsdaWjLSvyGipByAikqfUAxARyVPqAYiI5CklABGRPKUEICKSp5QARETylBKAiEieUgIQEclT/x84WlMAsdPsfQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "train_scores = (-1*train_scores)**0.5\n",
    "valid_scores = (-1*valid_scores)**0.5\n",
    "train_scores_mean = np.mean(train_scores, axis=1)\n",
    "train_scores_std = np.std(train_scores, axis=1)\n",
    "valid_scores_mean = np.mean(valid_scores, axis=1)\n",
    "valid_scores_std = np.std(valid_scores, axis=1)\n",
    "\n",
    "plt.figure()\n",
    "plt.plot(train_sizes,valid_scores_mean,label='valid')\n",
    "plt.plot(train_sizes,train_scores_mean,label='train')\n",
    "plt.fill_between(train_sizes, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, alpha=0.3,color=\"g\")\n",
    "plt.fill_between(train_sizes, valid_scores_mean - valid_scores_std,valid_scores_mean + valid_scores_std, alpha=0.3, color=\"b\")\n",
    "plt.xlabel('Number of samples')\n",
    "plt.ylabel('RMSE')\n",
    "plt.legend()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Test set RMSE: 4885.545\n"
     ]
    }
   ],
   "source": [
    "linreg = LinearRegression()\n",
    "linreg.fit(X_train,y_train)\n",
    "y_predicted = linreg.predict(X_test)\n",
    "print('Test set RMSE: {:.3f}'.format(mean_squared_error(y_test,y_predicted)**0.5))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
